## Create arrest Data
library(tidyverse)
library(openintro) #to convert state names
library(Hmisc) #for capitalize
options(scipen=0, digits=7) #to standardize number of digits when exporting data. all data here is run using these options.

source("lib/arrest-functions.R")
# load and combine all files
arrest_path <- "raw-data/FOIA-LESO-controls"
arrest_files <- list.files(path=arrest_path, pattern="agencyarrests20[0-9][0-9].csv")
temparrest <- map(file.path(arrest_path,arrest_files),read_csv)
arrest <- bind_rows(temparrest)

# Do some cleaning, reduce number of variables -------
arrest <- change_colnames(arrest)

# use ORI codes to match to right city
ORI <- read_csv("raw-data/ORI_codes.csv") 

ORI <- ORI %>% 
  select(FPLACE, FIPS_ST, FIPS_COUNTY, FIPS, ORI7, ORI9, NAME, STATENAME, COUNTYNAME, AGCYTYPE,
         ADDRESS_CITY, ADDRESS_STATE, INTPTLAT, INTPTLONG,ADDRESS_ZIP)%>% 
  rename_at(vars(1:14),function(x) paste0(x,"_ORI")) # rename so can keep track of where from

# aggregate arrests in categories
arrest <- make_crime_variables(arrest)
arrest <- arrest %>%
  select(ID_CODE:ZIP_CODE,Murder,Rape,Robbery,Burglary,Assault) %>%
  mutate(Population=POP_1+POP_2+POP_3) %>%
  select(-GROUP_NUMBER,-DIVISION,-CITY_SEQUENCE,-CORE_CITY,-COVERED_BY_CODE, #and delete vars we don't need
         -LAST_UPDATE, -FIELD_OFFICE, -AGENCY_COUNT,-COUNTY_1,-MSA_1,-COUNTY_2,-MSA_2,
         -COUNTY_3,-MSA_3,-POP_2,-POP_3,-FOLLOW_UP,-SPECIAL_MAILING_GROUP, -SPECIAL_MAILING_ADDRESS)

# data editing and merge arrests with ORI data
arrest <- inner_join(arrest,ORI, by = c("ORI" = "ORI7_ORI"))%>% #some observations are dropped because they have old ORI codes 
  select(-ID_CODE,-STATE_CODE,-AGENCY_STATE_NAME) %>%
  rename(City = ADDRESS_CITY_ORI,
         County = COUNTYNAME_ORI,
         State = STATENAME_ORI,
         State_abbrv = ADDRESS_STATE_ORI) %>%
  mutate(City = str_trim(str_to_title(City),side="right"),
         State = str_trim(str_to_title(State),side="right"),
         County = str_trim(str_to_title(County),side="right"),
         Assault = replace(Assault, which(Assault<0), 0),# set negative to 0
         zero = Murder + Rape+ Robbery + Burglary + Assault) %>% 
  mutate_at(.vars = vars(Murder:Assault),
            .funs = funs(. / Population*100000)) %>%
  rename_at(vars(-c(City,State_abbrv,YEAR,State,County,FIPS_ST_ORI, #finally, make sure it's obvious that the arrests data is for arrests of certain crimes
                    FIPS_COUNTY_ORI,FIPS_ORI, NAME_ORI, INTPTLAT_ORI, INTPTLONG_ORI,
                    AGCYTYPE_ORI, FPLACE_ORI,zero,Population)),function(x) paste0(x,"_ArrestRate")) %>%
  arrange(State,City,YEAR) %>% mutate_if(is.numeric, function(x) ifelse(is.infinite(x), 0, x))

#and, aggregate to city
arrest_city <- arrest %>%
  group_by(City,State,YEAR) %>%
  summarize_at(c("Murder_ArrestRate","Rape_ArrestRate","Robbery_ArrestRate",
                 "Burglary_ArrestRate","Assault_ArrestRate"),sum,na.rm=T)

saveRDS(arrest_city,file="data/arrests-by-city.RDS")

## county level arrest data -------
countyarrest_files <- list.files(path=arrest_path, pattern="countysumarrests20[0-9][0-9].csv")
tempcountyarrest <- map(file.path(arrest_path,countyarrest_files),read_csv)

countyarrest <- bind_rows(tempcountyarrest) %>%
  mutate(FIPS_ST = str_pad(FIPS_ST,width=2,pad=0,side="left"),
         FIPS_CTY = str_pad(FIPS_CTY,width=2,pad=0,side="left")) %>%
  select(FIPS_ST,FIPS_CTY,year,P1VLNT,P1PRPTY,MURDER,RAPE,OTHASLT,
         ROBBERY,AGASSLT,BURGLRY,LARCENY,MVTHEFT,ARSON,WEAPONS,DRGSALE,
         DRGPOSS,CPOPARST) %>%
  mutate_at(.vars = vars(P1VLNT:DRGPOSS),
            .funs = funs(. / CPOPARST*100000)) %>% select(-CPOPARST) %>%
rename_at(vars(-c(FIPS_ST,FIPS_CTY,year)),function(x) paste0(x,"_ArrestRate")) %>% mutate_if(is.numeric, function(x) ifelse(is.infinite(x), 0, x))

save(countyarrest, file = 'data/arrest_county.RData')
